
*******************************
********** Data prep **********
*******************************

use "$data/paymentdata" , clear

gen treatmentidentity=0 if treatmentall==0
replace treatmentidentity=1 if treatmentall==1 | treatmentall==2
replace treatmentidentity=2 if treatmentall==3 | treatmentall==4
replace treatmentidentity=3 if treatmentall==5 | treatmentall==6
tab treatmentidentity, gen(treatmentidentityd)

bysort customer: egen interventionsumpayment=sum(payment) if t>=1 & t<=3
bysort customer: egen interventionsuminvoice=sum(invoice) if t>=1 & t<=3

** COLLECTION EFFICIENCY **

bysort customer: egen sumpayment=sum(payment) if t>=1
bysort customer: egen suminvoice=sum(invoice) if t>=1

bysort customer: egen presumpayment=sum(payment) if t<1
bysort customer: egen presuminvoice=sum(invoice) if t<1

bysort customer: egen postsumpayment=sum(payment) if t>3
bysort customer: egen postsuminvoice=sum(invoice) if t>3

bysort customer: egen premeanpayment=mean(payment) if t<1 						   
bysort customer: egen premeaninvoice=mean(invoice) if t<1 


foreach var in premeanpayment premeaninvoice sumpayment suminvoice presumpayment presuminvoice interventionsumpayment interventionsuminvoice postsumpayment postsuminvoice  {
bysort customer: egen `var'mean=mean(`var')
replace `var'=`var'mean
drop `var'mean
}

gen interventionratio=interventionsumpayment/interventionsuminvoice

gen ratio=sumpayment/suminvoice
gen preratio=presumpayment/presuminvoice
gen postratio=postsumpayment/postsuminvoice


** 99% winsor **
winsor interventionratio, gen(interventionratiow1) p(0.01)
winsor ratio, gen(ratiow1) p(0.01)
winsor preratio, gen(preratiow1) p(0.01)

winsor sumpayment, gen(sumpaymentw1) p(0.01)
winsor suminvoice, gen(suminvoicew1) p(0.01)

winsor presumpayment, gen(presumpaymentw1) p(0.01)
winsor presuminvoice, gen(presuminvoicew1) p(0.01)
winsor interventionsumpayment, gen(interventionsumpaymentw1) p(0.01)

winsor premeanpayment, gen(premeanpaymentw1) p(0.01)
winsor premeaninvoice, gen(premeaninvoicew1) p(0.01)

winsor interventionsuminvoice, gen(interventionsuminvoicew1) p(0.01)
winsor postsumpayment, gen(postsumpaymentw1) p(0.01)
winsor postsuminvoice, gen(postsuminvoicew1) p(0.01)
winsor postratio, gen(postratiow1) p(0.01)



gen visible=0 if stickerplacement==3 | stickerplacement==2  // 1= Outside // 2 Inside // 3 Mailbox 
replace visible=1 if stickerplacement==1 


** Housing Quality **
gen lowquality=0 
replace lowquality=1 if housequality==0 | housequality==1 | housequality==2 

gen highquality=0 
replace highquality=1 if  housequality==3 | housequality==4 

gen missquality=0 
replace missquality=1 if housequality==99 


** Visibility of Treatment **
gen lowvisible=0
replace lowvisible=1 if stickerplacement==3 | stickerplacement==2  // 3==mail 2==inside
 
gen highvisible=0
replace highvisible=1 if stickerplacement==1 

gen missvisible=0
replace missvisible=1 if stickerplacement==0


tab treatmentall, gen(treatmentalldummy)
tab team, gen(teamdummy)
tab firststicker, gen(firststickerdummy)
tab prepaymentsmade, gen(prepaymentsmadedummy)

gen prepaymentsmadeB=prepaymentsmade
replace prepaymentsmadeB=10 if prepaymentsmadeB==11 | prepaymentsmadeB==12 // this is necessary because of perfect collinearity (there are no customers with 11 or 12 pre-intervention payments that make 0 payments afterwards
tab prepaymentsmadeB, gen(prepaymentsmadedummyB)

replace treatmentall=. if presuminvoice==. // drop inactive customers
drop if treatmentall==.
drop if lastdebt_demeanedw1==. // first month of being a customer, cant control for covariates


save "$data/paymentdata_ready.dta" , replace



